png("Figure2bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust4, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 6 clusters
wardRange3 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 6)
summary(wardRange3, max.rank = 6)
MPCareer$clust6 <- factor(wardRange3$clustering$cluster6, levels = c(1, 2, 3, 4, 5, 6), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6"))
png("Figure3.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust6, border = NA, withlegend = T)
dev.off()
png("Figure3bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust6, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 3 clusters
wardRange4 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 3)
summary(wardRange4, max.rank = 10)
MPCareer$clust3 <- factor(wardRange1$clustering$cluster3, levels = c(1, 2, 3), labels = c("Career 1", "Career 2", "Career 3"))
png("Figure4.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust3, border = NA, withlegend = T)
dev.off()
png("Figure4bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust3, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 7 clusters
wardRange5 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 7)
summary(wardRange5, max.rank = 7)
MPCareer$clust7 <- factor(wardRange5$clustering$cluster7, levels = c(1, 2, 3, 4, 5, 6, 7), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6", "Career 7"))
png("Figure5.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust7, border = NA, withlegend = T)
dev.off()
png("Figure5bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust7, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 8 clusters
wardRange6 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 8)
summary(wardRange6, max.rank = 20)
MPCareer$clust8 <- factor(wardRange6$clustering$cluster8, levels = c(1, 2, 3, 4, 5, 6, 7, 8), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6", "Career 7", "Career 8"))
png("Figure6.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust8, border = NA, withlegend = T)
dev.off()
png("Figure6bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust8, border = NA, withlegend = T, cpal=pal)
dev.off()
View(MPCareer)
set.seed(1234)
setwd("C:/Users/matiav/Documents/Conference and Publications/Research and Politics 1/1R&R/sequence analysis")
MPCareer = read_excel("dataset 27_8_14 checked 10_5_16.xlsx")
library(data.table)
MPCareer_clust <- MPCareer[c(1,2,3,28:45)]
setnames(MPCareer_clust, old=c("1",  "2",  "3",  "4",  "5",  "6",  "7",  "8"  ,"9",  "10", "11", "12", "13", "14" ,"15" ,"16", "17", "18"), new=c("s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8"  ,"s9",  "s10", "s11", "s12", "s13", "s14" ,"s15" ,"s16", "s17", "s18"))
MPCareer_clust$s1[is.na(MPCareer_clust$s1)]<- 99
MPCareer_clust$s2[is.na(MPCareer_clust$s2)]<- 99
MPCareer_clust$s3[is.na(MPCareer_clust$s3)]<- 99
MPCareer_clust$s4[is.na(MPCareer_clust$s4)]<- 99
MPCareer_clust$s5[is.na(MPCareer_clust$s5)]<- 99
MPCareer_clust$s6[is.na(MPCareer_clust$s6)]<- 99
MPCareer_clust$s7[is.na(MPCareer_clust$s7)]<- 99
MPCareer_clust$s8[is.na(MPCareer_clust$s8)]<- 99
MPCareer_clust$s9[is.na(MPCareer_clust$s9)]<- 99
MPCareer_clust$s10[is.na(MPCareer_clust$s10)]<- 99
MPCareer_clust$s11[is.na(MPCareer_clust$s11)]<- 99
MPCareer_clust$s12[is.na(MPCareer_clust$s12)]<- 99
MPCareer_clust$s13[is.na(MPCareer_clust$s13)]<- 99
MPCareer_clust$s14[is.na(MPCareer_clust$s14)]<- 99
MPCareer_clust$s15[is.na(MPCareer_clust$s15)]<- 99
MPCareer_clust$s16[is.na(MPCareer_clust$s16)]<- 99
MPCareer_clust$s17[is.na(MPCareer_clust$s17)]<- 99
MPCareer_clust$s18[is.na(MPCareer_clust$s18)]<- 99
fviz_nbclust(MPCareer_clust[c(4:21)], hcut, method = "wss") +
geom_vline(xintercept = 4, linetype = 2)
##
clusters <- hclust(dist(MPCareer_clust[c(3:20)]))
clusterCut <- cutree(clusters, 8)
MPCareer_clust$newclust8<-clusterCut
##
MPCareer_merged<-merge(MPCareer, MPCareer_clust, by=c("id"), all.x=TRUE)
table(MPCareer_merged$clust8, MPCareer_merged$newclust8)
View(MPCareer_merged)
View(MPCareer)
MPCareer = read_excel("dataset 27_8_14 checked 10_5_16.xlsx")
print(MPCareer)
MPCareer <- MPCareer[MPCareer$party_n != 3, ]
for(level in unique(MPCareer$legislature)){         #create dummies for legislature
MPCareer[paste("dummyleg", level, sep = "_")] <- ifelse(MPCareer$legislature == level, 1, 0)
}
for(level in unique(MPCareer$party_n)){         #create dummies for party
MPCareer[paste("dummyparty", level, sep = "_")] <- ifelse(MPCareer$party_n == level, 1, 0)
}
for(level in unique(MPCareer$uni)){         #create dummies for uni
MPCareer[paste("dummyuni", level, sep = "_")] <- ifelse(MPCareer$uni == level, 1, 0)
}
MPCareer.inside.labels <- c("Backbench", "Frontbench", "Great Office")
MPCareer.inside.scode <- c(0,1,2)
MPCareer.inside.seq <- seqdef(MPCareer, 28:45, states = MPCareer.inside.scode,labels=MPCareer.inside.labels, missing.color="white", left="DEL")
pal <- grey(0:8/8)
###distance matrix
submat <- seqsubm(MPCareer.inside.seq, method = "TRATE", with.missing=FALSE)
dist.om1 <- seqdist(MPCareer.inside.seq, method = "OM",sm = submat, with.missing=TRUE)
###clustering - 5 clusters
wardClust <- hclust(as.dist(dist.om1), method = "ward.D")
wardRange1 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 5)
summary(wardRange1, max.rank = 5)
MPCareer$clust5 <- factor(wardRange1$clustering$cluster5, levels = c(1, 2, 3, 4, 5), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5"))
png("Figure1.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust5, border = NA, withlegend = T)
dev.off()
png("Figure1bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust5, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 4 clusters
wardRange2 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 4)
summary(wardRange2, max.rank = 10)
MPCareer$clust4 <- factor(wardRange2$clustering$cluster4, levels = c(1, 2, 3, 4), labels = c("Career 1", "Career 2", "Career 3", "Career 4"))
png("Figure2.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust4, border = NA, withlegend = T)
dev.off()
png("Figure2bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust4, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 6 clusters
wardRange3 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 6)
summary(wardRange3, max.rank = 6)
MPCareer$clust6 <- factor(wardRange3$clustering$cluster6, levels = c(1, 2, 3, 4, 5, 6), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6"))
png("Figure3.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust6, border = NA, withlegend = T)
dev.off()
png("Figure3bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust6, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 3 clusters
wardRange4 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 3)
summary(wardRange4, max.rank = 10)
MPCareer$clust3 <- factor(wardRange1$clustering$cluster3, levels = c(1, 2, 3), labels = c("Career 1", "Career 2", "Career 3"))
png("Figure4.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust3, border = NA, withlegend = T)
dev.off()
png("Figure4bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust3, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 7 clusters
wardRange5 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 7)
summary(wardRange5, max.rank = 7)
MPCareer$clust7 <- factor(wardRange5$clustering$cluster7, levels = c(1, 2, 3, 4, 5, 6, 7), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6", "Career 7"))
png("Figure5.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust7, border = NA, withlegend = T)
dev.off()
png("Figure5bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust7, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 8 clusters
wardRange6 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 8)
summary(wardRange6, max.rank = 20)
MPCareer$clust8 <- factor(wardRange6$clustering$cluster8, levels = c(1, 2, 3, 4, 5, 6, 7, 8), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6", "Career 7", "Career 8"))
png("Figure6.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust8, border = NA, withlegend = T)
dev.off()
png("Figure6bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust8, border = NA, withlegend = T, cpal=pal)
dev.off()
set.seed(1234)
library(data.table)
MPCareer_clust <- MPCareer[c(1,2,3,28:45)]
setnames(MPCareer_clust, old=c("1",  "2",  "3",  "4",  "5",  "6",  "7",  "8"  ,"9",  "10", "11", "12", "13", "14" ,"15" ,"16", "17", "18"), new=c("s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8"  ,"s9",  "s10", "s11", "s12", "s13", "s14" ,"s15" ,"s16", "s17", "s18"))
MPCareer_clust$s1[is.na(MPCareer_clust$s1)]<- 99
MPCareer_clust$s2[is.na(MPCareer_clust$s2)]<- 99
MPCareer_clust$s3[is.na(MPCareer_clust$s3)]<- 99
MPCareer_clust$s4[is.na(MPCareer_clust$s4)]<- 99
MPCareer_clust$s5[is.na(MPCareer_clust$s5)]<- 99
MPCareer_clust$s6[is.na(MPCareer_clust$s6)]<- 99
MPCareer_clust$s7[is.na(MPCareer_clust$s7)]<- 99
MPCareer_clust$s8[is.na(MPCareer_clust$s8)]<- 99
MPCareer_clust$s9[is.na(MPCareer_clust$s9)]<- 99
MPCareer_clust$s10[is.na(MPCareer_clust$s10)]<- 99
MPCareer_clust$s11[is.na(MPCareer_clust$s11)]<- 99
MPCareer_clust$s12[is.na(MPCareer_clust$s12)]<- 99
MPCareer_clust$s13[is.na(MPCareer_clust$s13)]<- 99
MPCareer_clust$s14[is.na(MPCareer_clust$s14)]<- 99
MPCareer_clust$s15[is.na(MPCareer_clust$s15)]<- 99
MPCareer_clust$s16[is.na(MPCareer_clust$s16)]<- 99
MPCareer_clust$s17[is.na(MPCareer_clust$s17)]<- 99
MPCareer_clust$s18[is.na(MPCareer_clust$s18)]<- 99
fviz_nbclust(MPCareer_clust[c(4:21)], hcut, method = "wss") +
geom_vline(xintercept = 4, linetype = 2)
##
clusters <- hclust(dist(MPCareer_clust[c(3:20)]))
clusterCut <- cutree(clusters, 8)
MPCareer_clust$newclust8<-clusterCut
##
MPCareer_merged<-merge(MPCareer, MPCareer_clust, by=c("id"), all.x=TRUE)
table(MPCareer_merged$clust8, MPCareer_merged$newclust8)
View(MPCareer)
View(MPCareer_clust)
tbl<-table(MPCareer_merged$clust8, MPCareer_merged$newclust8)
chisq.test(tbl)
View(MPCareer_merged)
?dist
set.seed(1234)
library(data.table)
MPCareer_clust <- MPCareer[c(1,2,3,28:45)]
setnames(MPCareer_clust, old=c("1",  "2",  "3",  "4",  "5",  "6",  "7",  "8"  ,"9",  "10", "11", "12", "13", "14" ,"15" ,"16", "17", "18"), new=c("s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8"  ,"s9",  "s10", "s11", "s12", "s13", "s14" ,"s15" ,"s16", "s17", "s18"))
MPCareer_clust$s1[is.na(MPCareer_clust$s1)]<- 99
MPCareer_clust$s2[is.na(MPCareer_clust$s2)]<- 99
MPCareer_clust$s3[is.na(MPCareer_clust$s3)]<- 99
MPCareer_clust$s4[is.na(MPCareer_clust$s4)]<- 99
MPCareer_clust$s5[is.na(MPCareer_clust$s5)]<- 99
MPCareer_clust$s6[is.na(MPCareer_clust$s6)]<- 99
MPCareer_clust$s7[is.na(MPCareer_clust$s7)]<- 99
MPCareer_clust$s8[is.na(MPCareer_clust$s8)]<- 99
MPCareer_clust$s9[is.na(MPCareer_clust$s9)]<- 99
MPCareer_clust$s10[is.na(MPCareer_clust$s10)]<- 99
MPCareer_clust$s11[is.na(MPCareer_clust$s11)]<- 99
MPCareer_clust$s12[is.na(MPCareer_clust$s12)]<- 99
MPCareer_clust$s13[is.na(MPCareer_clust$s13)]<- 99
MPCareer_clust$s14[is.na(MPCareer_clust$s14)]<- 99
MPCareer_clust$s15[is.na(MPCareer_clust$s15)]<- 99
MPCareer_clust$s16[is.na(MPCareer_clust$s16)]<- 99
MPCareer_clust$s17[is.na(MPCareer_clust$s17)]<- 99
MPCareer_clust$s18[is.na(MPCareer_clust$s18)]<- 99
fviz_nbclust(MPCareer_clust[c(4:21)], hcut, method = "euclidean") +
geom_vline(xintercept = 4, linetype = 2)
library(factoextra)
library(dendextend)
library(dplyr) # for data cleaning
library(cluster) # for gower similarity and pam
library(Rtsne) # for t-SNE plot
library(ggplot2) # for visualization
library(gower)
library(readstata13)
library(TraMineR)
library(foreign)
library(cluster)
library(nnet)
library(texreg)
library(dcolumn)
library(booktabs)
require(foreign)
require(ggplot2)
require(MASS)
require(Hmisc)
require(reshape2)
library(WeightedCluster)
library(Rgraphviz)
library(TraMineRextras)
library(readxl)
library(plyr)
library(plm)
library(lmtest)
library(futile.logger)
set.seed(1234)
library(data.table)
MPCareer_clust <- MPCareer[c(1,2,3,28:45)]
setnames(MPCareer_clust, old=c("1",  "2",  "3",  "4",  "5",  "6",  "7",  "8"  ,"9",  "10", "11", "12", "13", "14" ,"15" ,"16", "17", "18"), new=c("s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8"  ,"s9",  "s10", "s11", "s12", "s13", "s14" ,"s15" ,"s16", "s17", "s18"))
MPCareer_clust$s1[is.na(MPCareer_clust$s1)]<- 99
MPCareer_clust$s2[is.na(MPCareer_clust$s2)]<- 99
MPCareer_clust$s3[is.na(MPCareer_clust$s3)]<- 99
MPCareer_clust$s4[is.na(MPCareer_clust$s4)]<- 99
MPCareer_clust$s5[is.na(MPCareer_clust$s5)]<- 99
MPCareer_clust$s6[is.na(MPCareer_clust$s6)]<- 99
MPCareer_clust$s7[is.na(MPCareer_clust$s7)]<- 99
MPCareer_clust$s8[is.na(MPCareer_clust$s8)]<- 99
MPCareer_clust$s9[is.na(MPCareer_clust$s9)]<- 99
MPCareer_clust$s10[is.na(MPCareer_clust$s10)]<- 99
MPCareer_clust$s11[is.na(MPCareer_clust$s11)]<- 99
MPCareer_clust$s12[is.na(MPCareer_clust$s12)]<- 99
MPCareer_clust$s13[is.na(MPCareer_clust$s13)]<- 99
MPCareer_clust$s14[is.na(MPCareer_clust$s14)]<- 99
MPCareer_clust$s15[is.na(MPCareer_clust$s15)]<- 99
MPCareer_clust$s16[is.na(MPCareer_clust$s16)]<- 99
MPCareer_clust$s17[is.na(MPCareer_clust$s17)]<- 99
MPCareer_clust$s18[is.na(MPCareer_clust$s18)]<- 99
fviz_nbclust(MPCareer_clust[c(4:21)], hcut, method = "euclidean") +
geom_vline(xintercept = 4, linetype = 2)
##
fviz_nbclust(MPCareer_clust[c(4:21)], hcut, method = "wss") +
geom_vline(xintercept = 4, linetype = 2)
clusters <- hclust(dist(MPCareer_clust[c(3:20)],method = "euclidean"))
clusterCut <- cutree(clusters, 8)
MPCareer_clust$newclust8<-clusterCut
set.seed(1234)
library(data.table)
MPCareer_clust <- MPCareer[c(1,2,3,28:45)]
setnames(MPCareer_clust, old=c("1",  "2",  "3",  "4",  "5",  "6",  "7",  "8"  ,"9",  "10", "11", "12", "13", "14" ,"15" ,"16", "17", "18"), new=c("s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8"  ,"s9",  "s10", "s11", "s12", "s13", "s14" ,"s15" ,"s16", "s17", "s18"))
clusters <- hclust(dist(MPCareer_clust[c(3:20)],method = "euclidean"))
library(foreign)
write.dta(MPCareer, "C:/Users/matiav/Documents/Conference and Publications/Research and Politics 1/1R&R/sequence analysis/MPCareer.dta")
###
###Oxbridge
MPCareer_uni <- MPCareer[ which(MPCareer$uni!=0), ]
MPCareer_uni$Oxbridge<-1
MPCareer_uni$Oxbridge[is.na(MPCareer_uni$`Oxbridge college1`)&is.na(MPCareer_uni$`Oxbridge college2`)&is.na(MPCareer_uni$`Oxbridge college3`)] <- 0
table(MPCareer_uni$Oxbridge)
?dist
set.seed(1234)
library(data.table)
MPCareer_clust <- MPCareer[c(1,2,3,28:45)]
setnames(MPCareer_clust, old=c("1",  "2",  "3",  "4",  "5",  "6",  "7",  "8"  ,"9",  "10", "11", "12", "13", "14" ,"15" ,"16", "17", "18"), new=c("s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8"  ,"s9",  "s10", "s11", "s12", "s13", "s14" ,"s15" ,"s16", "s17", "s18"))
MPCareer_clust$s1[is.na(MPCareer_clust$s1)]<- 99
MPCareer_clust$s2[is.na(MPCareer_clust$s2)]<- 99
MPCareer_clust$s3[is.na(MPCareer_clust$s3)]<- 99
MPCareer_clust$s4[is.na(MPCareer_clust$s4)]<- 99
MPCareer_clust$s5[is.na(MPCareer_clust$s5)]<- 99
MPCareer_clust$s6[is.na(MPCareer_clust$s6)]<- 99
MPCareer_clust$s7[is.na(MPCareer_clust$s7)]<- 99
MPCareer_clust$s8[is.na(MPCareer_clust$s8)]<- 99
MPCareer_clust$s9[is.na(MPCareer_clust$s9)]<- 99
MPCareer_clust$s10[is.na(MPCareer_clust$s10)]<- 99
MPCareer_clust$s11[is.na(MPCareer_clust$s11)]<- 99
MPCareer_clust$s12[is.na(MPCareer_clust$s12)]<- 99
MPCareer_clust$s13[is.na(MPCareer_clust$s13)]<- 99
MPCareer_clust$s14[is.na(MPCareer_clust$s14)]<- 99
MPCareer_clust$s15[is.na(MPCareer_clust$s15)]<- 99
MPCareer_clust$s16[is.na(MPCareer_clust$s16)]<- 99
MPCareer_clust$s17[is.na(MPCareer_clust$s17)]<- 99
MPCareer_clust$s18[is.na(MPCareer_clust$s18)]<- 99
fviz_nbclust(MPCareer_clust[c(4:21)], hcut, method = "wss") +
geom_vline(xintercept = 4, linetype = 2)
##
clusters <- hclust(dist(MPCareer_clust[c(3:20)],method = "euclidean"))
clusterCut <- cutree(clusters, 8)
MPCareer_clust$newclust8<-clusterCut
##
MPCareer_merged<-merge(MPCareer, MPCareer_clust, by=c("id"), all.x=TRUE)
tbl<-table(MPCareer_merged$clust8, MPCareer_merged$newclust8)
chisq.test(tbl)
View(MPCareer_merged)
View(MPCareer_merged)
##6 Dec 2017
###############Sequence Analysis MP Career####################
##############################################################
setwd("C:/Users/matiav/Documents/Conference and Publications/Research and Politics 1/1R&R/sequence analysis")
#install.packages("readstata13")
#install.packages("TraMineR")
#install.packages("foreign")
#install.packages("cluster")
#install.packages("nnet")
#install.packages("texreg")
#install.packages("dcolumn")
#install.packages("booktabs")
#install.packages("ggplot2")
#install.packages("MASS")
#install.packages("Hmisc")
#install.packages("reshape2")
#install.packages("TraMineRextras")
#install.packages("readxl")
#install.packages("plm")
#install.packages("lmtest")
#install.packages("WeightedCluster")
#install.packages("dcolumn")
#install.packages("booktabs")
#install.packages("Rgraphviz")
#install.packages("gower")
#install.packages("Rtsne")
#install.packages("dendextend")
#install.packages("factoextra ")
library(factoextra)
library(dendextend)
library(dplyr) # for data cleaning
library(cluster) # for gower similarity and pam
library(Rtsne) # for t-SNE plot
library(ggplot2) # for visualization
library(gower)
library(readstata13)
library(TraMineR)
library(foreign)
library(cluster)
library(nnet)
library(texreg)
library(dcolumn)
library(booktabs)
require(foreign)
require(ggplot2)
require(MASS)
require(Hmisc)
require(reshape2)
library(WeightedCluster)
library(Rgraphviz)
library(TraMineRextras)
library(readxl)
library(plyr)
library(plm)
library(lmtest)
library(futile.logger)
MPCareer = read_excel("dataset 27_8_14 checked 10_5_16.xlsx")
print(MPCareer)
MPCareer <- MPCareer[MPCareer$party_n != 3, ]
for(level in unique(MPCareer$legislature)){         #create dummies for legislature
MPCareer[paste("dummyleg", level, sep = "_")] <- ifelse(MPCareer$legislature == level, 1, 0)
}
for(level in unique(MPCareer$party_n)){         #create dummies for party
MPCareer[paste("dummyparty", level, sep = "_")] <- ifelse(MPCareer$party_n == level, 1, 0)
}
for(level in unique(MPCareer$uni)){         #create dummies for uni
MPCareer[paste("dummyuni", level, sep = "_")] <- ifelse(MPCareer$uni == level, 1, 0)
}
MPCareer.inside.labels <- c("Backbench", "Frontbench", "Great Office")
MPCareer.inside.scode <- c(0,1,2)
MPCareer.inside.seq <- seqdef(MPCareer, 28:45, states = MPCareer.inside.scode,labels=MPCareer.inside.labels, missing.color="white", left="DEL")
pal <- grey(0:8/8)
###distance matrix
submat <- seqsubm(MPCareer.inside.seq, method = "TRATE", with.missing=FALSE)
dist.om1 <- seqdist(MPCareer.inside.seq, method = "OM",sm = submat, with.missing=TRUE)
###clustering - 5 clusters
wardClust <- hclust(as.dist(dist.om1), method = "ward.D")
wardRange1 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 5)
summary(wardRange1, max.rank = 5)
MPCareer$clust5 <- factor(wardRange1$clustering$cluster5, levels = c(1, 2, 3, 4, 5), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5"))
png("Figure1.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust5, border = NA, withlegend = T)
dev.off()
png("Figure1bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust5, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 4 clusters
wardRange2 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 4)
summary(wardRange2, max.rank = 10)
MPCareer$clust4 <- factor(wardRange2$clustering$cluster4, levels = c(1, 2, 3, 4), labels = c("Career 1", "Career 2", "Career 3", "Career 4"))
png("Figure2.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust4, border = NA, withlegend = T)
dev.off()
png("Figure2bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust4, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 6 clusters
wardRange3 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 6)
summary(wardRange3, max.rank = 6)
MPCareer$clust6 <- factor(wardRange3$clustering$cluster6, levels = c(1, 2, 3, 4, 5, 6), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6"))
png("Figure3.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust6, border = NA, withlegend = T)
dev.off()
png("Figure3bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust6, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 3 clusters
wardRange4 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 3)
summary(wardRange4, max.rank = 10)
MPCareer$clust3 <- factor(wardRange1$clustering$cluster3, levels = c(1, 2, 3), labels = c("Career 1", "Career 2", "Career 3"))
png("Figure4.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust3, border = NA, withlegend = T)
dev.off()
png("Figure4bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust3, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 7 clusters
wardRange5 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 7)
summary(wardRange5, max.rank = 7)
MPCareer$clust7 <- factor(wardRange5$clustering$cluster7, levels = c(1, 2, 3, 4, 5, 6, 7), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6", "Career 7"))
png("Figure5.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust7, border = NA, withlegend = T)
dev.off()
png("Figure5bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust7, border = NA, withlegend = T, cpal=pal)
dev.off()
###clustering - 8 clusters
wardRange6 <- as.clustrange(wardClust, diss=dist.om1, ncluster = 8)
summary(wardRange6, max.rank = 20)
MPCareer$clust8 <- factor(wardRange6$clustering$cluster8, levels = c(1, 2, 3, 4, 5, 6, 7, 8), labels = c("Career 1", "Career 2", "Career 3", "Career 4", "Career 5", "Career 6", "Career 7", "Career 8"))
png("Figure6.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust8, border = NA, withlegend = T)
dev.off()
png("Figure6bw.png",width = 10, height = 8, units = 'in', res = 1000)
seqdplot(MPCareer.inside.seq, group = MPCareer$clust8, border = NA, withlegend = T, cpal=pal)
dev.off()
##Discrepancy Analyis
set.seed(1)
print(dissmfacw(dist.om1 ~ gender + state_school + job +
age_at_entry + byelections + dummyleg_1+ dummyleg_2+ dummyleg_3+dummyuni_1+ dummyuni_2+dummyparty_1+ dummyparty_2, data=MPCareer, R=200))
###
library(foreign)
write.dta(MPCareer, "C:/Users/matiav/Documents/Conference and Publications/Research and Politics 1/1R&R/sequence analysis/MPCareer.dta")
###
###Oxbridge
MPCareer_uni <- MPCareer[ which(MPCareer$uni!=0), ]
MPCareer_uni$Oxbridge<-1
MPCareer_uni$Oxbridge[is.na(MPCareer_uni$`Oxbridge college1`)&is.na(MPCareer_uni$`Oxbridge college2`)&is.na(MPCareer_uni$`Oxbridge college3`)] <- 0
table(MPCareer_uni$Oxbridge)
###
library(foreign)
write.dta(MPCareer_uni, "C:/Users/matiav/Documents/Conference and Publications/Research and Politics 1/1R&R/sequence analysis/MPCareer_uni.dta")
###
###Hierarchical Clustering Alone
set.seed(1234)
library(data.table)
MPCareer_clust <- MPCareer[c(1,2,3,28:45)]
setnames(MPCareer_clust, old=c("1",  "2",  "3",  "4",  "5",  "6",  "7",  "8"  ,"9",  "10", "11", "12", "13", "14" ,"15" ,"16", "17", "18"), new=c("s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  "s8"  ,"s9",  "s10", "s11", "s12", "s13", "s14" ,"s15" ,"s16", "s17", "s18"))
MPCareer_clust$s1[is.na(MPCareer_clust$s1)]<- 99
MPCareer_clust$s2[is.na(MPCareer_clust$s2)]<- 99
MPCareer_clust$s3[is.na(MPCareer_clust$s3)]<- 99
MPCareer_clust$s4[is.na(MPCareer_clust$s4)]<- 99
MPCareer_clust$s5[is.na(MPCareer_clust$s5)]<- 99
MPCareer_clust$s6[is.na(MPCareer_clust$s6)]<- 99
MPCareer_clust$s7[is.na(MPCareer_clust$s7)]<- 99
MPCareer_clust$s8[is.na(MPCareer_clust$s8)]<- 99
MPCareer_clust$s9[is.na(MPCareer_clust$s9)]<- 99
MPCareer_clust$s10[is.na(MPCareer_clust$s10)]<- 99
MPCareer_clust$s11[is.na(MPCareer_clust$s11)]<- 99
MPCareer_clust$s12[is.na(MPCareer_clust$s12)]<- 99
MPCareer_clust$s13[is.na(MPCareer_clust$s13)]<- 99
MPCareer_clust$s14[is.na(MPCareer_clust$s14)]<- 99
MPCareer_clust$s15[is.na(MPCareer_clust$s15)]<- 99
MPCareer_clust$s16[is.na(MPCareer_clust$s16)]<- 99
MPCareer_clust$s17[is.na(MPCareer_clust$s17)]<- 99
MPCareer_clust$s18[is.na(MPCareer_clust$s18)]<- 99
fviz_nbclust(MPCareer_clust[c(4:21)], hcut, method = "wss") +
geom_vline(xintercept = 4, linetype = 2)
##
clusters <- hclust(dist(MPCareer_clust[c(3:20)],method = "euclidean"))
clusterCut <- cutree(clusters, 8)
MPCareer_clust$newclust8<-clusterCut
##
MPCareer_merged<-merge(MPCareer, MPCareer_clust, by=c("id"), all.x=TRUE)
tbl<-table(MPCareer_merged$clust8, MPCareer_merged$newclust8)
chisq.test(tbl)
